1 Setup

library(eurostat)
library(tidyverse)
library(plm)
library(lmtest)
library(sandwich)
library(stargazer)
library(corrplot)
library(kableExtra)
library(scales)
library(moments)

2 Data Sources

We use annual panel data from Eurostat for EU-27 countries covering the period 2013–2023. The dataset includes information on GDP per capita, migration, unemployment, and investment.

Variable Eurostat Code Description
GDP per capita sdg_08_10 Real GDP per capita in PPS
Net Migration migr_netmigr Net migration (immigration - emigration)
Population demo_gind Total population
Unemployment une_rt_a Unemployment rate (% of active population)
Investment Rate sdg_08_11 Gross fixed capital formation (% of GDP)

3 Data Preparation

3.1 GDP per Capita

gdp_pc <- get_eurostat(
id = "sdg_08_10",
time_format = "num"
) %>%
select(geo, TIME_PERIOD, values) %>%
rename(
time = TIME_PERIOD,
gdp_pc = values
) %>%
filter(time >= 2013, time <= 2023)
gdp_pc <- gdp_pc %>%
  distinct(geo, time, .keep_all = TRUE)

3.2 Migration Data

migration_raw <- get_eurostat(
  id = "migr_netmigr",
  time_format = "num"
)

migration <- migration_raw %>%
  select(geo, TIME_PERIOD, values) %>%
  rename(
    time = TIME_PERIOD,
    net_migration = values
  ) %>%
  filter(time >= 2013, time <= 2023)
migration <- migration %>%
  distinct(geo, time, .keep_all = TRUE)
population_raw <- get_eurostat(
  id = "demo_gind",
  time_format = "num"
)

population <- population_raw %>%
  select(geo, TIME_PERIOD, values) %>%
  rename(
    time = TIME_PERIOD,
    population = values
  ) %>%
  filter(time >= 2013, time <= 2023) %>%
  distinct(geo, time, .keep_all = TRUE)
migration <- migration %>%
  left_join(population, by = c("geo", "time")) %>%
  mutate(
    migration_rate = 1000 * net_migration / population
  )

3.3 Unemployment Data

unemployment_raw <- get_eurostat(
id = "une_rt_a",
time_format = "num"
)

unemployment <- unemployment_raw %>%
filter(sex == "T", age == "Y15-74") %>%
select(geo, TIME_PERIOD, values) %>%
rename(
time = TIME_PERIOD,
unemployment_rate = values
) %>%
filter(time >= 2013, time <= 2023)
unemployment <- unemployment %>%
  distinct(geo, time, .keep_all = TRUE)

3.4 Investment Data (Gross Fixed Capital Formation)

investment_raw <- get_eurostat(
  id = "sdg_08_11",
  time_format = "num"
)

investment <- investment_raw %>%
  select(geo, TIME_PERIOD, values) %>%
  rename(
    time = TIME_PERIOD,
    investment_rate = values
  ) %>%
  filter(time >= 2013, time <= 2023) %>%
  distinct(geo, time, .keep_all = TRUE)

4 Panel Construction

4.1 EU-27 Countries

eu27 <- c(
"AT","BE","BG","HR","CY","CZ","DK","EE","FI","FR","DE",
"EL","HU","IE","IT","LV","LT","LU","MT","NL","PL","PT",
"RO","SK","SI","ES","SE"
)

# Country names for better visualization
country_names <- c(
  "AT" = "Austria", "BE" = "Belgium", "BG" = "Bulgaria", "HR" = "Croatia",
  "CY" = "Cyprus", "CZ" = "Czechia", "DK" = "Denmark", "EE" = "Estonia",
  "FI" = "Finland", "FR" = "France", "DE" = "Germany", "EL" = "Greece",
  "HU" = "Hungary", "IE" = "Ireland", "IT" = "Italy", "LV" = "Latvia",
  "LT" = "Lithuania", "LU" = "Luxembourg", "MT" = "Malta", "NL" = "Netherlands",
  "PL" = "Poland", "PT" = "Portugal", "RO" = "Romania", "SK" = "Slovakia",
  "SI" = "Slovenia", "ES" = "Spain", "SE" = "Sweden"
)

panel_data <- gdp_pc %>%
  distinct(geo, time, .keep_all = TRUE) %>%
  filter(geo %in% eu27) %>%
  inner_join(
    migration %>% distinct(geo, time, .keep_all = TRUE) %>% filter(geo %in% eu27),
    by = c("geo", "time")
  ) %>%
  inner_join(
    unemployment %>% distinct(geo, time, .keep_all = TRUE) %>% filter(geo %in% eu27),
    by = c("geo", "time")
  ) %>%
  inner_join(
    investment %>% distinct(geo, time, .keep_all = TRUE) %>% filter(geo %in% eu27),
    by = c("geo", "time")
  ) %>%
  arrange(geo, time) %>%
  group_by(geo) %>%
  mutate(
    gdp_pc_growth = 100 * (log(gdp_pc) - log(dplyr::lag(gdp_pc))),
    country_name = country_names[geo]
  ) %>%
  ungroup() %>%
  filter(!is.na(gdp_pc_growth))

5 Final Dataset Check

panel_data %>%
summarise(
countries = n_distinct(geo),
years = n_distinct(time),
observations = n()
)
## # A tibble: 1 Ă— 3
##   countries years observations
##       <int> <int>        <int>
## 1        27    10          269
table(table(panel_data$geo))
## 
##  9 10 
##  1 26

6 Descriptive Statistics

6.1 Summary Statistics

desc_stats <- panel_data %>%
  select(gdp_pc_growth, migration_rate, unemployment_rate, investment_rate) %>%
  pivot_longer(everything(), names_to = "Variable", values_to = "Value") %>%
  group_by(Variable) %>%
  summarise(
    N = n(),
    Mean = round(mean(Value, na.rm = TRUE), 2),
    SD = round(sd(Value, na.rm = TRUE), 2),
    Min = round(min(Value, na.rm = TRUE), 2),
    Q1 = round(quantile(Value, 0.25, na.rm = TRUE), 2),
    Median = round(median(Value, na.rm = TRUE), 2),
    Q3 = round(quantile(Value, 0.75, na.rm = TRUE), 2),
    Max = round(max(Value, na.rm = TRUE), 2),
    Skewness = round(moments::skewness(Value, na.rm = TRUE), 2),
    Kurtosis = round(moments::kurtosis(Value, na.rm = TRUE), 2),
    .groups = "drop"
  )

kable(desc_stats, caption = "Summary Statistics of Key Variables") %>%
  kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE) %>%
  footnote(general = "Skewness: 0 = symmetric; Kurtosis: 3 = normal distribution")
Summary Statistics of Key Variables
Variable N Mean SD Min Q1 Median Q3 Max Skewness Kurtosis
gdp_pc_growth 269 2.27 3.75 -12.15 0.64 2.25 4.43 21.07 -0.05 6.54
investment_rate 269 13.10 4.10 5.31 11.11 12.98 14.78 49.42 3.76 30.71
migration_rate 269 2.19 3.48 -4.49 0.18 1.36 3.01 17.70 1.66 6.77
unemployment_rate 269 7.60 4.09 2.00 5.10 6.60 8.70 26.60 1.99 7.96
Note:
Skewness: 0 = symmetric; Kurtosis: 3 = normal distribution

6.2 Between vs. Within Variation

For panel data analysis, it’s crucial to understand the decomposition of variance:

  • Between variation: Differences across countries (cross-sectional)
  • Within variation: Changes over time within each country (temporal)
# Calculate overall, between, and within statistics
variance_decomp <- panel_data %>%
  select(geo, gdp_pc_growth, migration_rate, unemployment_rate, investment_rate) %>%
  pivot_longer(-geo, names_to = "Variable", values_to = "Value") %>%
  group_by(Variable) %>%
  summarise(
    `Overall Mean` = round(mean(Value, na.rm = TRUE), 2),
    `Overall SD` = round(sd(Value, na.rm = TRUE), 2),
    .groups = "drop"
  )

# Between variation (variation of country means)
between_var <- panel_data %>%
  group_by(geo) %>%
  summarise(
    gdp_pc_growth = mean(gdp_pc_growth, na.rm = TRUE),
    migration_rate = mean(migration_rate, na.rm = TRUE),
    unemployment_rate = mean(unemployment_rate, na.rm = TRUE),
    investment_rate = mean(investment_rate, na.rm = TRUE),
    .groups = "drop"
  ) %>%
  pivot_longer(-geo, names_to = "Variable", values_to = "Value") %>%
  group_by(Variable) %>%
  summarise(
    `Between SD` = round(sd(Value, na.rm = TRUE), 2),
    .groups = "drop"
  )

# Within variation (variation around country means)
within_var <- panel_data %>%
  group_by(geo) %>%
  mutate(
    gdp_pc_growth_dm = gdp_pc_growth - mean(gdp_pc_growth, na.rm = TRUE),
    migration_rate_dm = migration_rate - mean(migration_rate, na.rm = TRUE),
    unemployment_rate_dm = unemployment_rate - mean(unemployment_rate, na.rm = TRUE),
    investment_rate_dm = investment_rate - mean(investment_rate, na.rm = TRUE)
  ) %>%
  ungroup() %>%
  summarise(
    gdp_pc_growth = sd(gdp_pc_growth_dm, na.rm = TRUE),
    migration_rate = sd(migration_rate_dm, na.rm = TRUE),
    unemployment_rate = sd(unemployment_rate_dm, na.rm = TRUE),
    investment_rate = sd(investment_rate_dm, na.rm = TRUE)
  ) %>%
  pivot_longer(everything(), names_to = "Variable", values_to = "Within SD") %>%
  mutate(`Within SD` = round(`Within SD`, 2))

# Combine all
var_decomp_table <- variance_decomp %>%
  left_join(between_var, by = "Variable") %>%
  left_join(within_var, by = "Variable")

kable(var_decomp_table, caption = "Variance Decomposition: Overall, Between, and Within") %>%
  kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE) %>%
  footnote(
    general = "Between SD: cross-country variation; Within SD: within-country (temporal) variation",
    general_title = "Note: "
  )
Variance Decomposition: Overall, Between, and Within
Variable Overall Mean Overall SD Between SD Within SD
gdp_pc_growth 2.27 3.75 1.51 3.44
investment_rate 13.10 4.10 3.50 2.23
migration_rate 2.19 3.48 2.53 2.43
unemployment_rate 7.60 4.09 3.54 2.15
Note:
Between SD: cross-country variation; Within SD: within-country (temporal) variation

Interpretation:

  • If Between SD > Within SD: Most variation is across countries → Random Effects may be appropriate
  • If Within SD > Between SD: Most variation is over time → Fixed Effects can identify effects well

6.3 Distribution of Variables

panel_data %>%
  select(gdp_pc_growth, migration_rate, unemployment_rate, investment_rate) %>%
  pivot_longer(everything(), names_to = "Variable", values_to = "Value") %>%
  ggplot(aes(x = Variable, y = Value, fill = Variable)) +
  geom_boxplot(alpha = 0.7, outlier.color = "red", outlier.shape = 1) +
  geom_jitter(width = 0.2, alpha = 0.1, size = 0.5) +
  facet_wrap(~ Variable, scales = "free", ncol = 2) +
  labs(
    title = "Distribution of Key Variables",
    subtitle = "Boxplots with individual observations",
    x = "",
    y = "Value"
  ) +
  theme_minimal(base_size = 12) +
  theme(
    legend.position = "none",
    strip.text = element_text(face = "bold"),
    plot.title = element_text(face = "bold"),
    axis.text.x = element_blank()
  ) +
  scale_fill_brewer(palette = "Set2")

6.4 Statistics by Country

country_stats <- panel_data %>%
  group_by(Country = country_name) %>%
  summarise(
    `Years` = n(),
    `Avg GDP Growth` = round(mean(gdp_pc_growth, na.rm = TRUE), 2),
    `SD GDP Growth` = round(sd(gdp_pc_growth, na.rm = TRUE), 2),
    `Avg Migration Rate` = round(mean(migration_rate, na.rm = TRUE), 2),
    `Avg Unemployment` = round(mean(unemployment_rate, na.rm = TRUE), 2),
    `Avg Investment` = round(mean(investment_rate, na.rm = TRUE), 2),
    .groups = "drop"
  ) %>%
  arrange(desc(`Avg GDP Growth`))

kable(country_stats, caption = "Summary Statistics by Country (2014-2023)") %>%
  kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE) %>%
  scroll_box(height = "400px")
Summary Statistics by Country (2014-2023)
Country Years Avg GDP Growth SD GDP Growth Avg Migration Rate Avg Unemployment Avg Investment
Ireland 10 6.71 7.05 4.17 6.86 26.22
Romania 10 3.98 3.07 -0.83 6.34 13.02
Malta 10 3.90 5.15 9.82 4.36 13.02
Bulgaria 9 3.84 2.77 0.35 7.24 13.05
Poland 10 3.78 2.23 0.13 4.77 10.05
Croatia 10 3.74 5.18 -1.44 10.03 12.90
Lithuania 10 3.55 2.41 0.55 7.58 13.04
Cyprus 10 3.38 4.03 3.52 9.77 8.24
Hungary 10 3.34 3.36 0.99 4.58 14.76
Latvia 10 2.74 3.00 -0.92 8.20 14.09
Slovenia 10 2.68 3.18 1.34 6.03 11.54
Slovakia 10 2.35 2.44 0.15 7.99 12.92
Czechia 10 1.88 3.04 2.89 3.25 15.50
Portugal 10 1.88 4.07 2.27 8.86 12.40
Estonia 10 1.85 4.01 3.03 6.13 15.89
Greece 10 1.63 4.69 -0.08 19.08 6.73
Spain 10 1.52 5.11 3.18 16.84 13.52
Netherlands 10 1.36 2.76 3.01 5.47 10.68
Denmark 10 1.27 2.09 1.70 5.54 13.88
Italy 10 1.21 4.44 1.30 10.30 10.57
Belgium 10 1.15 2.74 2.39 6.72 15.23
Sweden 10 0.91 1.98 3.10 7.54 16.07
Germany 10 0.80 2.26 3.66 3.60 11.80
France 10 0.76 3.53 1.05 8.80 11.90
Austria 10 0.58 3.21 3.59 5.66 15.24
Finland 10 0.54 1.86 1.77 7.94 12.68
Luxembourg 10 0.02 2.39 8.32 5.75 8.88

6.5 Statistics by Year

year_stats <- panel_data %>%
  group_by(Year = time) %>%
  summarise(
    Countries = n(),
    `Avg GDP Growth` = round(mean(gdp_pc_growth, na.rm = TRUE), 2),
    `SD GDP Growth` = round(sd(gdp_pc_growth, na.rm = TRUE), 2),
    `Avg Migration Rate` = round(mean(migration_rate, na.rm = TRUE), 2),
    `Avg Unemployment` = round(mean(unemployment_rate, na.rm = TRUE), 2),
    `Avg Investment` = round(mean(investment_rate, na.rm = TRUE), 2),
    .groups = "drop"
  )

kable(year_stats, caption = "Summary Statistics by Year (EU-27 Average)") %>%
  kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE)
Summary Statistics by Year (EU-27 Average)
Year Countries Avg GDP Growth SD GDP Growth Avg Migration Rate Avg Unemployment Avg Investment
2014 27 2.09 2.15 0.94 10.84 12.10
2015 27 3.33 3.97 1.22 9.99 12.51
2016 27 2.34 1.39 1.18 8.99 13.05
2017 27 3.73 2.43 1.48 7.87 13.17
2018 27 3.05 1.90 1.98 6.82 12.86
2019 27 2.48 1.53 2.10 6.21 14.27
2020 27 -4.38 3.55 1.57 7.02 13.53
2021 27 7.03 2.70 1.80 6.64 13.01
2022 27 2.75 2.67 6.10 5.77 13.28
2023 26 0.17 2.41 3.60 5.82 13.27

Notable observations:

  • 2020: COVID-19 pandemic caused significant GDP decline across all countries
  • 2021: Strong recovery (“bounce-back effect”)
  • 2022: Recovery continued despite energy crisis and inflation

6.6 Correlation Matrix

cor_vars <- panel_data %>%
  select(gdp_pc_growth, migration_rate, unemployment_rate, investment_rate) %>%
  na.omit()

cor_matrix <- cor(cor_vars)

corrplot(cor_matrix, 
         method = "color", 
         type = "upper",
         addCoef.col = "black",
         tl.col = "black",
         tl.srt = 45,
         col = colorRampPalette(c("#D73027", "white", "#1A9850"))(100),
         title = "Correlation Matrix",
         mar = c(0, 0, 2, 0))

Interpretation: - Negative correlation between unemployment and GDP growth suggests that higher unemployment is associated with lower economic growth. - Investment rate typically shows a positive correlation with GDP growth, reflecting the importance of capital formation. - The relationship between migration and GDP growth can be examined further in the regression analysis.

7 Data Visualization

7.1 GDP per Capita Growth Over Time

ggplot(panel_data, aes(x = time, y = gdp_pc_growth, color = country_name)) +
  geom_line(alpha = 0.7, linewidth = 0.8) +
  geom_point(size = 1.5, alpha = 0.6) +
  geom_hline(yintercept = 0, linetype = "dashed", color = "gray40", linewidth = 0.8) +
  labs(
    title = "GDP per Capita Growth in EU-27 Countries (2014-2023)",
    subtitle = "Annual percentage change (log difference Ă— 100)",
    x = "Year",
    y = "GDP per Capita Growth (%)",
    color = "Country"
  ) +
  theme_minimal(base_size = 12) +
  theme(
    legend.position = "right",
    legend.text = element_text(size = 8),
    plot.title = element_text(face = "bold")
  ) +
  scale_x_continuous(breaks = seq(2014, 2023, 1)) +
  scale_color_viridis_d(option = "turbo")

7.3 Average GDP Growth by Country

avg_growth <- panel_data %>%
  group_by(geo, country_name) %>%
  summarise(
    avg_growth = mean(gdp_pc_growth, na.rm = TRUE),
    .groups = "drop"
  ) %>%
  arrange(desc(avg_growth))

ggplot(avg_growth, aes(x = reorder(country_name, avg_growth), y = avg_growth, fill = avg_growth)) +
  geom_col() +
  coord_flip() +
  geom_hline(yintercept = mean(avg_growth$avg_growth), linetype = "dashed", color = "red", linewidth = 0.8) +
  scale_fill_gradient2(low = "#D73027", mid = "#FFFFBF", high = "#1A9850", midpoint = 0) +
  labs(
    title = "Average GDP per Capita Growth by Country (2014-2023)",
    subtitle = "Red dashed line = EU-27 average",
    x = "",
    y = "Average Annual GDP Growth (%)",
    fill = "Growth"
  ) +
  theme_minimal(base_size = 12) +
  theme(
    legend.position = "none",
    plot.title = element_text(face = "bold")
  )

7.4 Scatter Plots: Bivariate Relationships

7.4.1 Migration Rate vs. GDP Growth

ggplot(panel_data, aes(x = migration_rate, y = gdp_pc_growth)) +
  geom_point(aes(color = country_name), alpha = 0.6, size = 2) +
  geom_smooth(method = "lm", se = TRUE, color = "black", linetype = "dashed", linewidth = 1) +
  geom_hline(yintercept = 0, linetype = "dotted", color = "gray50") +
  geom_vline(xintercept = 0, linetype = "dotted", color = "gray50") +
  labs(
    title = "Migration Rate vs. GDP per Capita Growth",
    subtitle = "Each point = one country-year observation; dashed line = OLS fit",
    x = "Migration Rate (per 1,000 inhabitants)",
    y = "GDP per Capita Growth (%)",
    color = "Country"
  ) +
  theme_minimal(base_size = 12) +
  theme(
    legend.position = "right",
    legend.text = element_text(size = 7),
    plot.title = element_text(face = "bold")
  ) +
  scale_color_viridis_d(option = "turbo")

7.4.2 Unemployment Rate vs. GDP Growth

ggplot(panel_data, aes(x = unemployment_rate, y = gdp_pc_growth)) +
  geom_point(aes(color = country_name), alpha = 0.6, size = 2) +
  geom_smooth(method = "lm", se = TRUE, color = "black", linetype = "dashed", linewidth = 1) +
  geom_hline(yintercept = 0, linetype = "dotted", color = "gray50") +
  labs(
    title = "Unemployment Rate vs. GDP per Capita Growth",
    subtitle = "Each point = one country-year observation; dashed line = OLS fit",
    x = "Unemployment Rate (%)",
    y = "GDP per Capita Growth (%)",
    color = "Country"
  ) +
  theme_minimal(base_size = 12) +
  theme(
    legend.position = "right",
    legend.text = element_text(size = 7),
    plot.title = element_text(face = "bold")
  ) +
  scale_color_viridis_d(option = "turbo")

7.4.3 Investment Rate vs. GDP Growth

ggplot(panel_data, aes(x = investment_rate, y = gdp_pc_growth)) +
  geom_point(aes(color = country_name), alpha = 0.6, size = 2) +
  geom_smooth(method = "lm", se = TRUE, color = "black", linetype = "dashed", linewidth = 1) +
  geom_hline(yintercept = 0, linetype = "dotted", color = "gray50") +
  labs(
    title = "Investment Rate vs. GDP per Capita Growth",
    subtitle = "Each point = one country-year observation; dashed line = OLS fit",
    x = "Investment Rate (% of GDP)",
    y = "GDP per Capita Growth (%)",
    color = "Country"
  ) +
  theme_minimal(base_size = 12) +
  theme(
    legend.position = "right",
    legend.text = element_text(size = 7),
    plot.title = element_text(face = "bold")
  ) +
  scale_color_viridis_d(option = "turbo")

8 Econometric Analysis

8.1 Panel Data Setup

pdata <- pdata.frame(
  panel_data,
  index = c("geo", "time")
)
pdim(pdata)
## Unbalanced Panel: n = 27, T = 9-10, N = 269

8.2 Pooled OLS Model

pooled_ols <- plm(
  gdp_pc_growth ~ migration_rate + unemployment_rate + investment_rate,
  data = pdata,
  model = "pooling"
)

summary(pooled_ols)
## Pooling Model
## 
## Call:
## plm(formula = gdp_pc_growth ~ migration_rate + unemployment_rate + 
##     investment_rate, data = pdata, model = "pooling")
## 
## Unbalanced Panel: n = 27, T = 9-10, N = 269
## 
## Residuals:
##       Min.    1st Qu.     Median    3rd Qu.       Max. 
## -14.172765  -1.482294   0.024949   1.923257  17.995276 
## 
## Coefficients:
##                    Estimate Std. Error t-value Pr(>|t|)  
## (Intercept)        1.581061   1.033860  1.5293  0.12739  
## migration_rate    -0.137823   0.068770 -2.0041  0.04608 *
## unemployment_rate -0.041585   0.060438 -0.6880  0.49202  
## investment_rate    0.099503   0.057474  1.7313  0.08457 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Total Sum of Squares:    3767.2
## Residual Sum of Squares: 3666.1
## R-Squared:      0.026842
## Adj. R-Squared: 0.015825
## F-statistic: 2.43644 on 3 and 265 DF, p-value: 0.065094

8.3 Fixed Effects Model

fe_model <- plm(
  gdp_pc_growth ~ migration_rate + unemployment_rate + investment_rate,
  data = pdata,
  model = "within"
)

summary(fe_model)
## Oneway (individual) effect Within Model
## 
## Call:
## plm(formula = gdp_pc_growth ~ migration_rate + unemployment_rate + 
##     investment_rate, data = pdata, model = "within")
## 
## Unbalanced Panel: n = 27, T = 9-10, N = 269
## 
## Residuals:
##      Min.   1st Qu.    Median   3rd Qu.      Max. 
## -13.83399  -0.68165   0.35786   1.33627  13.65233 
## 
## Coefficients:
##                    Estimate Std. Error t-value Pr(>|t|)
## migration_rate    -0.159450   0.098284 -1.6223   0.1060
## unemployment_rate -0.104525   0.112914 -0.9257   0.3555
## investment_rate   -0.107810   0.100876 -1.0687   0.2863
## 
## Total Sum of Squares:    3175.1
## Residual Sum of Squares: 3125
## R-Squared:      0.015784
## Adj. R-Squared: -0.10364
## F-statistic: 1.2776 on 3 and 239 DF, p-value: 0.28269

8.4 Random Effects Model

re_model <- plm(
  gdp_pc_growth ~ migration_rate + unemployment_rate + investment_rate,
  data = pdata,
  model = "random"
)

summary(re_model)
## Oneway (individual) effect Random Effect Model 
##    (Swamy-Arora's transformation)
## 
## Call:
## plm(formula = gdp_pc_growth ~ migration_rate + unemployment_rate + 
##     investment_rate, data = pdata, model = "random")
## 
## Unbalanced Panel: n = 27, T = 9-10, N = 269
## 
## Effects:
##                   var std.dev share
## idiosyncratic 13.0753  3.6160  0.95
## individual     0.6927  0.8323  0.05
## theta:
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.1771  0.1915  0.1915  0.1910  0.1915  0.1915 
## 
## Residuals:
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
## -14.0240  -1.2629   0.0745  -0.0006   1.7711  17.5224 
## 
## Coefficients:
##                    Estimate Std. Error z-value Pr(>|z|)  
## (Intercept)        2.054506   1.159880  1.7713  0.07651 .
## migration_rate    -0.142243   0.074254 -1.9156  0.05541 .
## unemployment_rate -0.052171   0.068218 -0.7648  0.44441  
## investment_rate    0.070375   0.064358  1.0935  0.27418  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Total Sum of Squares:    3563.4
## Residual Sum of Squares: 3495.8
## R-Squared:      0.018991
## Adj. R-Squared: 0.0078856
## Chisq: 5.07167 on 3 DF, p-value: 0.16662

9 Diagnostic Tests

9.1 Hausman Test: FE vs. RE

The Hausman test examines whether the country-specific effects are correlated with the regressors.

  • H0: Random effects model is consistent and efficient (use RE)
  • H1: Fixed effects model is consistent (use FE)
hausman_test <- phtest(fe_model, re_model)
hausman_test
## 
##  Hausman Test
## 
## data:  gdp_pc_growth ~ migration_rate + unemployment_rate + investment_rate
## chisq = 5.3943, df = 3, p-value = 0.1451
## alternative hypothesis: one model is inconsistent
## **Interpretation:** The Hausman test is not significant (p = 0.1451 ). We fail to reject H0 and can use the **Random Effects** model.
##  This suggests that the random effects estimator is consistent and more efficient.

9.2 F-Test for Individual Effects

Testing whether fixed effects are jointly significant (i.e., whether country-specific intercepts matter):

  • H0: All individual effects are zero (Pooled OLS is adequate)
  • H1: Individual effects are significant (Fixed Effects is needed)
f_test <- pFtest(fe_model, pooled_ols)
f_test
## 
##  F test for individual effects
## 
## data:  gdp_pc_growth ~ migration_rate + unemployment_rate + investment_rate
## F = 1.5917, df1 = 26, df2 = 239, p-value = 0.03865
## alternative hypothesis: significant effects
## **Interpretation:** The F-test is significant (p = 3.87e-02 ). Country-specific effects are jointly significant.
##  Pooled OLS is inappropriate; we need to account for individual heterogeneity.

9.3 Breusch-Pagan LM Test for Random Effects

Testing whether random effects are present:

  • H0: Variance of individual effects is zero (Pooled OLS is adequate)
  • H1: Random effects are present
bp_lm_test <- plmtest(pooled_ols, type = "bp")
bp_lm_test
## 
##  Lagrange Multiplier Test - (Breusch-Pagan)
## 
## data:  gdp_pc_growth ~ migration_rate + unemployment_rate + investment_rate
## chisq = 1.5204, df = 1, p-value = 0.2176
## alternative hypothesis: significant effects
## **Interpretation:** The Breusch-Pagan LM test is not significant (p = 0.2176 ).
##  No evidence of significant individual effects.

9.4 Test for Heteroskedasticity

9.4.1 Breusch-Pagan Test

Testing whether the variance of residuals is constant across observations:

  • H0: Homoskedasticity (constant variance)
  • H1: Heteroskedasticity (non-constant variance)
bp_hetero <- bptest(fe_model)
bp_hetero
## 
##  studentized Breusch-Pagan test
## 
## data:  fe_model
## BP = 1.6987, df = 3, p-value = 0.6372
## **Interpretation:** The Breusch-Pagan test is not significant (p = 0.6372 ).
##  No strong evidence of heteroskedasticity.

9.4.2 Residuals vs. Fitted Plot

# Extract residuals and fitted values
fe_residuals <- residuals(fe_model)
fe_fitted <- fitted(fe_model)

resid_df <- data.frame(
  Fitted = fe_fitted,
  Residuals = fe_residuals
)

ggplot(resid_df, aes(x = Fitted, y = Residuals)) +
  geom_point(alpha = 0.5, color = "steelblue") +
  geom_hline(yintercept = 0, linetype = "dashed", color = "red") +
  geom_smooth(method = "loess", se = TRUE, color = "darkred", linewidth = 0.8) +
  labs(
    title = "Residuals vs. Fitted Values (Fixed Effects Model)",
    subtitle = "Red line should be flat around zero for homoskedasticity",
    x = "Fitted Values",
    y = "Residuals"
  ) +
  theme_minimal(base_size = 12) +
  theme(plot.title = element_text(face = "bold"))

9.5 Test for Serial Correlation

9.5.1 Breusch-Godfrey/Wooldridge Test

Testing for first-order serial correlation in panel models:

  • H0: No serial correlation
  • H1: Serial correlation present
bg_test <- pbgtest(fe_model, order = 1)
bg_test
## 
##  Breusch-Godfrey/Wooldridge test for serial correlation in panel models
## 
## data:  gdp_pc_growth ~ migration_rate + unemployment_rate + investment_rate
## chisq = 20.55, df = 1, p-value = 5.81e-06
## alternative hypothesis: serial correlation in idiosyncratic errors
## **Interpretation:** The Breusch-Godfrey test is significant (p = 0 ).
##  There is evidence of first-order serial correlation.
##  **Newey-West or cluster-robust standard errors are recommended.**

9.5.2 Durbin-Watson Test

dw_test <- pdwtest(fe_model)
dw_test
## 
##  Durbin-Watson test for serial correlation in panel models
## 
## data:  gdp_pc_growth ~ migration_rate + unemployment_rate + investment_rate
## DW = 2.5232, p-value = 1
## alternative hypothesis: serial correlation in idiosyncratic errors

Note: Durbin-Watson statistic close to 2 indicates no autocorrelation. Values < 2 suggest positive autocorrelation, > 2 suggest negative autocorrelation.

9.6 Cross-Sectional Dependence

Testing whether residuals are correlated across countries (common in macro panels):

  • H0: No cross-sectional dependence
  • H1: Cross-sectional dependence present
cd_test <- pcdtest(fe_model, test = "cd")
cd_test
## 
##  Pesaran CD test for cross-sectional dependence in panels
## 
## data:  gdp_pc_growth ~ migration_rate + unemployment_rate + investment_rate
## z = 44.224, p-value < 2.2e-16
## alternative hypothesis: cross-sectional dependence
## **Interpretation:** The Pesaran CD test is significant (p = 0 ).
##  There is evidence of cross-sectional dependence.
##  This is common in EU data due to shared economic shocks. Consider Driscoll-Kraay standard errors.

9.7 Summary of Diagnostic Tests

diagnostic_results <- data.frame(
  Test = c("Hausman (FE vs. RE)", 
           "F-Test (Individual Effects)", 
           "Breusch-Pagan LM (Random Effects)",
           "Breusch-Pagan (Heteroskedasticity)", 
           "Breusch-Godfrey (Serial Correlation)",
           "Pesaran CD (Cross-sectional Dependence)"),
  Statistic = c(round(hausman_test$statistic, 3),
                round(f_test$statistic, 3),
                round(bp_lm_test$statistic, 3),
                round(bp_hetero$statistic, 3),
                round(bg_test$statistic, 3),
                round(cd_test$statistic, 3)),
  `P-Value` = c(round(hausman_test$p.value, 4),
                format(f_test$p.value, scientific = TRUE, digits = 2),
                format(bp_lm_test$p.value, scientific = TRUE, digits = 2),
                round(bp_hetero$p.value, 4),
                round(bg_test$p.value, 4),
                round(cd_test$p.value, 4)),
  Decision = c(ifelse(hausman_test$p.value < 0.05, "Use FE", "Use RE"),
               ifelse(f_test$p.value < 0.05, "FE needed", "Pooled OK"),
               ifelse(bp_lm_test$p.value < 0.05, "RE needed", "Pooled OK"),
               ifelse(bp_hetero$p.value < 0.05, "Robust SE needed", "OK"),
               ifelse(bg_test$p.value < 0.05, "Robust SE needed", "OK"),
               ifelse(cd_test$p.value < 0.05, "Consider DK-SE", "OK"))
)

kable(diagnostic_results, caption = "Summary of Diagnostic Tests") %>%
  kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE) %>%
  column_spec(4, bold = TRUE)
Summary of Diagnostic Tests
Test Statistic P.Value Decision
Hausman (FE vs. RE) 5.394 0.1451 Use RE
F-Test (Individual Effects) 1.592 3.9e-02 FE needed
Breusch-Pagan LM (Random Effects) 1.520 2.2e-01 Pooled OK
Breusch-Pagan (Heteroskedasticity) 1.699 0.6372 OK
Breusch-Godfrey (Serial Correlation) 20.550 0 Robust SE needed
Pesaran CD (Cross-sectional Dependence) 44.224 0 Consider DK-SE

9.8 Robust Standard Errors

Given the diagnostic test results, we compute cluster-robust standard errors for all three models to ensure valid inference under heteroskedasticity and within-cluster correlation.

9.8.1 Robust Pooled OLS

# Cluster-robust standard errors for Pooled OLS
pooled_robust <- coeftest(
  pooled_ols,
  vcov = vcovHC(pooled_ols, type = "HC1", cluster = "group")
)

pooled_robust
## 
## t test of coefficients:
## 
##                    Estimate Std. Error t value Pr(>|t|)  
## (Intercept)        1.581061   0.949488  1.6652  0.09706 .
## migration_rate    -0.137823   0.091585 -1.5049  0.13355  
## unemployment_rate -0.041585   0.039806 -1.0447  0.29712  
## investment_rate    0.099503   0.054925  1.8116  0.07118 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

9.8.2 Robust Fixed Effects

# Cluster-robust standard errors for Fixed Effects
fe_robust <- coeftest(
  fe_model,
  vcov = vcovHC(fe_model, type = "HC1", cluster = "group")
)

fe_robust
## 
## t test of coefficients:
## 
##                    Estimate Std. Error t value Pr(>|t|)  
## migration_rate    -0.159450   0.082024 -1.9439  0.05308 .
## unemployment_rate -0.104525   0.075868 -1.3777  0.16958  
## investment_rate   -0.107810   0.059807 -1.8026  0.07270 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

9.8.3 Robust Random Effects

# Cluster-robust standard errors for Random Effects
re_robust <- coeftest(
  re_model,
  vcov = vcovHC(re_model, type = "HC1", cluster = "group")
)

re_robust
## 
## t test of coefficients:
## 
##                    Estimate Std. Error t value Pr(>|t|)   
## (Intercept)        2.054506   0.788969  2.6040 0.009733 **
## migration_rate    -0.142243   0.084358 -1.6862 0.092937 . 
## unemployment_rate -0.052171   0.040426 -1.2905 0.197994   
## investment_rate    0.070375   0.041161  1.7097 0.088488 . 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

9.8.4 Newey-West Standard Errors (HAC)

Heteroskedasticity and Autocorrelation Consistent (HAC) standard errors account for both heteroskedasticity and serial correlation:

# Newey-West for all models
pooled_nw <- coeftest(pooled_ols, vcov = vcovNW(pooled_ols))
fe_nw <- coeftest(fe_model, vcov = vcovNW(fe_model))
re_nw <- coeftest(re_model, vcov = vcovNW(re_model))

9.8.5 Comparison of Standard Errors Across Models

# Create comparison table for all models
se_comparison <- data.frame(
  Variable = rep(c("Migration Rate", "Unemployment Rate", "Investment Rate"), 3),
  Model = rep(c("Pooled OLS", "Fixed Effects", "Random Effects"), each = 3),
  `Conventional SE` = c(
    coef(summary(pooled_ols))[-1, "Std. Error"],
    coef(summary(fe_model))[, "Std. Error"],
    coef(summary(re_model))[-1, "Std. Error"]
  ),
  `Cluster-Robust SE` = c(
    pooled_robust[-1, "Std. Error"],
    fe_robust[, "Std. Error"],
    re_robust[-1, "Std. Error"]
  ),
  `Newey-West SE` = c(
    pooled_nw[-1, "Std. Error"],
    fe_nw[, "Std. Error"],
    re_nw[-1, "Std. Error"]
  )
) %>%
  mutate(
    `Conventional SE` = round(`Conventional.SE`, 4),
    `Cluster-Robust SE` = round(`Cluster.Robust.SE`, 4),
    `Newey-West SE` = round(`Newey.West.SE`, 4),
    `SE Inflation (%)` = round((`Cluster.Robust.SE` / `Conventional.SE` - 1) * 100, 1)
  ) %>%
  select(Variable, Model, `Conventional SE`, `Cluster-Robust SE`, `Newey-West SE`, `SE Inflation (%)`)

kable(se_comparison, caption = "Comparison of Standard Error Estimates Across All Models") %>%
  kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE) %>%
  pack_rows("Pooled OLS", 1, 3) %>%
  pack_rows("Fixed Effects", 4, 6) %>%
  pack_rows("Random Effects", 7, 9)
Comparison of Standard Error Estimates Across All Models
Variable Model Conventional SE Cluster-Robust SE Newey-West SE SE Inflation (%)
Pooled OLS
Migration Rate Pooled OLS 0.0688 0.0916 0.0640 33.2
Unemployment Rate Pooled OLS 0.0604 0.0398 0.0531 -34.1
Investment Rate Pooled OLS 0.0575 0.0549 0.0549 -4.4
Fixed Effects
Migration Rate Fixed Effects 0.0983 0.0820 0.0883 -16.5
Unemployment Rate Fixed Effects 0.1129 0.0759 0.0940 -32.8
Investment Rate Fixed Effects 0.1009 0.0598 0.1126 -40.7
Random Effects
Migration Rate Random Effects 0.0743 0.0844 0.0648 13.6
Unemployment Rate Random Effects 0.0682 0.0404 0.0577 -40.7
Investment Rate Random Effects 0.0644 0.0412 0.0516 -36.0

Interpretation: - SE Inflation > 0%: Robust SE larger than conventional → heteroskedasticity/clustering present - Large inflation (>50%): Strong evidence of model misspecification; use robust SE for inference - If inflation is similar across models, the issue is systematic (e.g., country-level clustering)

10 Model Comparison

10.1 Conventional Standard Errors

stargazer(
  pooled_ols,
  fe_model,
  re_model,
  type = "html",
  title = "Panel Regression Results (Conventional Standard Errors)",
  dep.var.labels = "GDP per capita growth",
  column.labels = c("Pooled OLS", "Fixed Effects", "Random Effects")
)
Panel Regression Results (Conventional Standard Errors)
Dependent variable:
GDP per capita growth
Pooled OLS Fixed Effects Random Effects
(1) (2) (3)
migration_rate -0.138** -0.159 -0.142*
(0.069) (0.098) (0.074)
unemployment_rate -0.042 -0.105 -0.052
(0.060) (0.113) (0.068)
investment_rate 0.100* -0.108 0.070
(0.057) (0.101) (0.064)
Constant 1.581 2.055*
(1.034) (1.160)
Observations 269 269 269
R2 0.027 0.016 0.019
Adjusted R2 0.016 -0.104 0.008
F Statistic 2.436* (df = 3; 265) 1.278 (df = 3; 239) 5.072
Note: p<0.1; p<0.05; p<0.01

10.2 Robust Standard Errors Comparison

stargazer(
  pooled_ols,
  fe_model,
  re_model,
  type = "html",
  title = "Panel Regression Results (Cluster-Robust Standard Errors)",
  dep.var.labels = "GDP per capita growth",
  column.labels = c("Pooled OLS", "Fixed Effects", "Random Effects"),
  se = list(
    pooled_robust[-1, "Std. Error"],
    fe_robust[, "Std. Error"],
    re_robust[-1, "Std. Error"]
  ),
  p = list(
    pooled_robust[-1, "Pr(>|t|)"],
    fe_robust[, "Pr(>|t|)"],
    re_robust[-1, "Pr(>|t|)"]
  ),
  notes = "Cluster-robust standard errors in parentheses"
)
Panel Regression Results (Cluster-Robust Standard Errors)
Dependent variable:
GDP per capita growth
Pooled OLS Fixed Effects Random Effects
(1) (2) (3)
migration_rate -0.138 -0.159* -0.142*
(0.092) (0.082) (0.084)
unemployment_rate -0.042 -0.105 -0.052
(0.040) (0.076) (0.040)
investment_rate 0.100* -0.108* 0.070*
(0.055) (0.060) (0.041)
Constant 1.581 2.055
Observations 269 269 269
R2 0.027 0.016 0.019
Adjusted R2 0.016 -0.104 0.008
F Statistic 2.436* (df = 3; 265) 1.278 (df = 3; 239) 5.072
Note: p<0.1; p<0.05; p<0.01
Cluster-robust standard errors in parentheses

10.3 Summary: Robust Results

# Create a clean summary table with robust results
robust_results <- data.frame(
  Variable = c("Migration Rate", "Unemployment Rate", "Investment Rate"),
  
  # Pooled OLS with robust SE
  `Pooled Coef` = round(coef(pooled_ols)[-1], 3),
  `Pooled SE` = round(pooled_robust[-1, "Std. Error"], 3),
  `Pooled Sig` = ifelse(pooled_robust[-1, "Pr(>|t|)"] < 0.01, "***",
                        ifelse(pooled_robust[-1, "Pr(>|t|)"] < 0.05, "**",
                               ifelse(pooled_robust[-1, "Pr(>|t|)"] < 0.1, "*", ""))),
  
  # Fixed Effects with robust SE
  `FE Coef` = round(coef(fe_model), 3),
  `FE SE` = round(fe_robust[, "Std. Error"], 3),
  `FE Sig` = ifelse(fe_robust[, "Pr(>|t|)"] < 0.01, "***",
                    ifelse(fe_robust[, "Pr(>|t|)"] < 0.05, "**",
                           ifelse(fe_robust[, "Pr(>|t|)"] < 0.1, "*", ""))),
  
  # Random Effects with robust SE
  `RE Coef` = round(coef(re_model)[-1], 3),
  `RE SE` = round(re_robust[-1, "Std. Error"], 3),
  `RE Sig` = ifelse(re_robust[-1, "Pr(>|t|)"] < 0.01, "***",
                    ifelse(re_robust[-1, "Pr(>|t|)"] < 0.05, "**",
                           ifelse(re_robust[-1, "Pr(>|t|)"] < 0.1, "*", "")))
)

# Format for display
robust_display <- robust_results %>%
  mutate(
    `Pooled OLS` = paste0(Pooled.Coef, Pooled.Sig, " (", Pooled.SE, ")"),
    `Fixed Effects` = paste0(FE.Coef, FE.Sig, " (", FE.SE, ")"),
    `Random Effects` = paste0(RE.Coef, RE.Sig, " (", RE.SE, ")")
  ) %>%
  select(Variable, `Pooled OLS`, `Fixed Effects`, `Random Effects`)

kable(robust_display, caption = "Regression Results with Cluster-Robust Standard Errors") %>%
  kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE) %>%
  footnote(general = "Significance: *** p<0.01, ** p<0.05, * p<0.1. Cluster-robust SE in parentheses.")
Regression Results with Cluster-Robust Standard Errors
Variable Pooled OLS Fixed Effects Random Effects
migration_rate Migration Rate -0.138 (0.092) -0.159* (0.082) -0.142* (0.084)
unemployment_rate Unemployment Rate -0.042 (0.04) -0.105 (0.076) -0.052 (0.04)
investment_rate Investment Rate 0.1* (0.055) -0.108* (0.06) 0.07* (0.041)
Note:
Significance: *** p<0.01, ** p<0.05, * p<0.1. Cluster-robust SE in parentheses.

11 Within-Country Variation

For the fixed effects model to identify effects, we need sufficient within-country variation:

panel_data %>%
  group_by(geo) %>%
  summarise(
    sd_migration = sd(migration_rate, na.rm = TRUE),
    sd_unemp = sd(unemployment_rate, na.rm = TRUE),
    sd_invest = sd(investment_rate, na.rm = TRUE)
  ) %>%
  summary()
##      geo             sd_migration      sd_unemp        sd_invest      
##  Length:27          Min.   :0.102   Min.   :0.5869   Min.   : 0.3102  
##  Class :character   1st Qu.:1.140   1st Qu.:0.8744   1st Qu.: 0.6561  
##  Mode  :character   Median :1.551   Median :1.4891   Median : 0.8747  
##                     Mean   :2.065   Mean   :1.9035   Mean   : 1.3787  
##                     3rd Qu.:2.708   3rd Qu.:2.5276   3rd Qu.: 1.3127  
##                     Max.   :5.345   Max.   :5.2952   Max.   :10.6529

12 Results Summary

12.1 Key Findings

# Create a comprehensive results table with ROBUST standard errors for all models
# Extract coefficients from preferred model (based on Hausman test)
preferred_model <- ifelse(hausman_test$p.value < 0.05, "Fixed Effects", "Random Effects")

# Get coefficients with robust SE for ALL models
results_table <- data.frame(
  Variable = c("Migration Rate (per 1,000)", "Unemployment Rate (%)", "Investment Rate (% GDP)"),
  
  `Pooled OLS` = paste0(
    round(coef(pooled_ols)[-1], 3), 
    ifelse(pooled_robust[-1, "Pr(>|t|)"] < 0.01, "***",
           ifelse(pooled_robust[-1, "Pr(>|t|)"] < 0.05, "**",
                  ifelse(pooled_robust[-1, "Pr(>|t|)"] < 0.1, "*", "")))
  ),
  
  `Fixed Effects` = paste0(
    round(coef(fe_model), 3),
    ifelse(fe_robust[, "Pr(>|t|)"] < 0.01, "***",
           ifelse(fe_robust[, "Pr(>|t|)"] < 0.05, "**",
                  ifelse(fe_robust[, "Pr(>|t|)"] < 0.1, "*", "")))
  ),
  
  `Random Effects` = paste0(
    round(coef(re_model)[-1], 3),
    ifelse(re_robust[-1, "Pr(>|t|)"] < 0.01, "***",
           ifelse(re_robust[-1, "Pr(>|t|)"] < 0.05, "**",
                  ifelse(re_robust[-1, "Pr(>|t|)"] < 0.1, "*", "")))
  )
)

kable(results_table, caption = "Coefficient Estimates Across Models (Cluster-Robust SE)") %>%
  kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE) %>%
  footnote(general = "Significance: *** p<0.01, ** p<0.05, * p<0.1. All models use cluster-robust SE.")
Coefficient Estimates Across Models (Cluster-Robust SE)
Variable Pooled.OLS Fixed.Effects Random.Effects
Migration Rate (per 1,000) -0.138 -0.159* -0.142*
Unemployment Rate (%) -0.042 -0.105 -0.052
Investment Rate (% GDP) 0.1* -0.108* 0.07*
Note:
Significance: *** p<0.01, ** p<0.05, * p<0.1. All models use cluster-robust SE.

12.2 Interpretation of Results

Note: All interpretations below use the preferred Random Effects model with cluster-robust standard errors.

12.2.1 Migration Rate

The coefficient for migration rate is -0.142 and is not statistically significant at the 5% level (p = 0.0929).

Economic interpretation: There is no statistically significant relationship between migration and GDP growth after controlling for country-specific effects. This suggests that within-country variation in migration does not significantly predict economic growth.

12.2.2 Unemployment Rate

The coefficient for unemployment rate is -0.052 and is not statistically significant at the 5% level (p = 0.198).

Economic interpretation: The relationship between unemployment and GDP growth is not statistically significant in the Random Effects specification. This may be due to the strong correlation between these variables being captured by country-specific effects.

12.2.3 Investment Rate

The coefficient for investment rate is 0.07 and is not statistically significant at the 5% level (p = 0.0885).

Economic interpretation: The relationship between investment and GDP growth is not statistically significant in the Random Effects specification. This may suggest that within-country changes in investment rates do not immediately translate into growth, potentially due to time lags in capital formation effects.

12.3 Model Selection Summary

Based on the diagnostic tests:

  1. Hausman Test: Not significant (p = 0.1451) → Random Effects preferred

  2. F-Test for Individual Effects: Significant → Country-specific effects matter, Pooled OLS inappropriate

  3. Heteroskedasticity: Not detected

  4. Serial Correlation: Present → Use HAC or cluster-robust standard errors

  5. Cross-Sectional Dependence: Present → Consider Driscoll-Kraay standard errors for inference

Recommended Model: Random Effects with cluster-robust standard errors.

13 Limitations

This analysis has several limitations that should be considered when interpreting the results:

13.1 Methodological Limitations

  1. Endogeneity:
    • Migration and unemployment may be endogenous to GDP growth (reverse causality)
    • Economic growth may attract migrants and reduce unemployment
    • Without instrumental variables, causal interpretation is limited
  2. Omitted Variable Bias:
    • Important determinants of growth are not included (education, capital investment, trade openness, institutional quality)
    • Time-varying confounders may bias the estimates
  3. Measurement Issues:
    • GDP per capita in PPS may not fully capture living standard differences
    • Migration data may undercount irregular migration
    • Annual data may miss within-year dynamics

13.2 Data Limitations

  1. Short Time Period:
    • 10 years (2014-2023) may not capture long-term structural relationships
    • Results may be sensitive to the specific time period chosen
  2. COVID-19 Shock:
    • Years 2020-2021 represent an extraordinary economic shock
    • May distort the underlying relationships between variables
# Visualize COVID impact
panel_data %>%
  mutate(covid_period = ifelse(time %in% c(2020, 2021), "COVID (2020-2021)", "Other Years")) %>%
  ggplot(aes(x = covid_period, y = gdp_pc_growth, fill = covid_period)) +
  geom_boxplot(alpha = 0.7) +
  geom_jitter(width = 0.2, alpha = 0.3, size = 1) +
  labs(
    title = "GDP Growth: COVID Period vs. Other Years",
    x = "",
    y = "GDP per Capita Growth (%)"
  ) +
  theme_minimal(base_size = 12) +
  theme(legend.position = "none", plot.title = element_text(face = "bold")) +
  scale_fill_manual(values = c("COVID (2020-2021)" = "#E74C3C", "Other Years" = "#3498DB"))

  1. EU-27 Sample:
    • Results specific to EU countries, may not generalize to other regions
    • EU integration and common policies create specific dynamics

14 Conclusion

14.1 Summary of Findings

This study analyzed the determinants of GDP per capita growth in the EU-27 countries from 2014 to 2023 using panel data econometrics.

Main Results:

  1. Migration and Growth: The effect of migration on economic growth is not statistically significant after controlling for country fixed effects. Within-country variation in migration does not significantly predict economic growth.

  2. Unemployment and Growth: The relationship between unemployment and GDP growth is not statistically significant.

  3. Investment and Growth: The relationship between investment and GDP growth is not statistically significant.

  4. Model Selection: Based on the Hausman test, the Random Effects model is preferred. Country-specific effects are important for understanding growth dynamics in the EU.

  5. Robustness: Results are robust to different standard error corrections (cluster-robust, Newey-West).

14.2 Policy Implications

Based on these findings:

  • Migration Policy: The non-significant effect suggests that migration’s impact on growth may depend on context-specific factors not captured in aggregate data. Disaggregated analysis by skill level or sector may be informative.

  • General: Country-specific factors play an important role in growth dynamics, suggesting that one-size-fits-all policies may be suboptimal. National contexts should be considered in policy design.

14.3 Suggestions for Future Research

  1. Instrumental Variables: Use instruments for migration (e.g., historical migration networks, geographic factors) to address endogeneity

  2. Additional Controls: Include education, R&D spending, capital formation, trade openness, institutional quality

  3. Disaggregated Analysis: Examine migration by skill level, sector, or origin country

  4. Longer Time Series: Extend the analysis to capture long-term growth dynamics

  5. Regional Data: Use NUTS-2 regional data for more granular insights

  6. Dynamic Panel Models: Estimate GMM models to account for persistence in GDP growth


15 Appendix

15.1 Session Information

sessionInfo()
## R version 4.3.1 (2023-06-16)
## Platform: aarch64-apple-darwin20 (64-bit)
## Running under: macOS 26.1
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/lib/libRblas.0.dylib 
## LAPACK: /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/lib/libRlapack.dylib;  LAPACK version 3.11.0
## 
## locale:
## [1] C.UTF-8/C.UTF-8/C.UTF-8/C/C.UTF-8/C.UTF-8
## 
## time zone: Europe/Lisbon
## tzcode source: internal
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] moments_0.14.1   scales_1.4.0     kableExtra_1.4.0 corrplot_0.95   
##  [5] stargazer_5.2.3  sandwich_3.1-1   lmtest_0.9-40    zoo_1.8-14      
##  [9] plm_2.6-6        lubridate_1.9.4  forcats_1.0.0    stringr_1.5.1   
## [13] dplyr_1.1.4      purrr_1.0.4      readr_2.1.4      tidyr_1.3.1     
## [17] tibble_3.2.1     ggplot2_3.5.2    tidyverse_2.0.0  eurostat_4.0.0  
## 
## loaded via a namespace (and not attached):
##  [1] tidyselect_1.2.1   viridisLite_0.4.2  farver_2.1.2       fastmap_1.2.0     
##  [5] digest_0.6.37      timechange_0.3.0   lifecycle_1.0.3    magrittr_2.0.3    
##  [9] compiler_4.3.1     rlang_1.1.1        sass_0.4.10        tools_4.3.1       
## [13] utf8_1.2.3         yaml_2.3.10        data.table_1.17.8  collapse_2.1.2    
## [17] knitr_1.50         labeling_0.4.3     bit_4.0.5          classInt_0.4-11   
## [21] curl_6.4.0         here_1.0.1         plyr_1.8.9         xml2_1.3.8        
## [25] RColorBrewer_1.1-3 KernSmooth_2.23-21 withr_2.5.0        grid_4.3.1        
## [29] fansi_1.0.4        ISOweek_0.6-2      e1071_1.7-16       MASS_7.3-60       
## [33] cli_3.6.1          crayon_1.5.2       rmarkdown_2.29     miscTools_0.6-28  
## [37] generics_0.1.4     rstudioapi_0.17.1  httr_1.4.7         tzdb_0.4.0        
## [41] bdsmatrix_1.3-7    readxl_1.4.3       cachem_1.1.0       proxy_0.4-27      
## [45] splines_4.3.1      regions_0.1.8      assertthat_0.2.1   parallel_4.3.1    
## [49] cellranger_1.1.0   vctrs_0.6.5        Matrix_1.5-4.1     jsonlite_2.0.0    
## [53] hms_1.1.3          bit64_4.0.5        Formula_1.2-5      systemfonts_1.2.3 
## [57] jquerylib_0.1.4    bibtex_0.5.1       glue_1.6.2         RefManageR_1.4.0  
## [61] stringi_1.8.7      countrycode_1.6.1  gtable_0.3.6       pillar_1.9.0      
## [65] rappdirs_0.3.3     htmltools_0.5.8.1  R6_2.5.1           httr2_1.1.2       
## [69] textshaping_1.0.1  maxLik_1.5-2.1     Rdpack_2.6.4       rprojroot_2.0.4   
## [73] vroom_1.6.3        evaluate_1.0.4     lattice_0.21-8     rbibutils_2.3     
## [77] backports_1.5.0    bslib_0.9.0        class_7.3-22       Rcpp_1.0.11       
## [81] svglite_2.2.1      nlme_3.1-162       mgcv_1.8-42        xfun_0.52         
## [85] pkgconfig_2.0.3

15.2 Data Sources

All data were retrieved from Eurostat on 2026-01-18:

15.3 Replication Code

The complete R code for this analysis is available in the accompanying .Rmd file. All results are fully reproducible using the Eurostat API.

15.4 Export Data for Poster

# Save data and models for poster visualization
saveRDS(panel_data, "panel_data.rds")
saveRDS(fe_model, "fe_model.rds")
saveRDS(re_model, "re_model.rds")

cat("Exported files for poster:\n")
## Exported files for poster:
cat("- panel_data.rds (", nrow(panel_data), " observations)\n", sep = "")
## - panel_data.rds (269 observations)
cat("- fe_model.rds (Fixed Effects model)\n")
## - fe_model.rds (Fixed Effects model)
cat("- re_model.rds (Random Effects model)\n")
## - re_model.rds (Random Effects model)